// mmap-fst.h

//The mmap fst type is a modified version or the const-fst.h
//The Mmap wrapper class is taken from mozc project


// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//


// Copyright 2010-2011, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#ifndef FSTEX_LIB_MMAP_FST_H__
#define FSTEX_LIB_MMAP_FST_H__

#include <string>
#include <vector>
using std::vector;
#include <fst/expanded-fst.h>
#include <fst/fst-decl.h>  // For optional argument declarations
#include <fst/test-properties.h>
#include <fst/util.h>

extern "C" {
#ifdef OS_WINDOWS
#include <windows.h>
#else
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
#endif
}
#include <string>


// Change HAVE_MMAP 0 when target os has no mmap call
#define HAVE_MMAP 1

#ifndef O_BINARY
#define O_BINARY 0
#endif



namespace fst {

	template <class T>
	class Mmap {
	public:
		T&       operator[](size_t n)       { return *(text_ + n); }
		const T& operator[](size_t n) const { return *(text_ + n); }
		T*       begin()           { return text_; }
		const T* begin()    const  { return text_; }
		T*       end()             { return text_ + Size(); }
		const T* end()    const    { return text_ + Size(); }
		int      Size()            { return length_/sizeof(T); }
		int      GetFileSize()     { return length_; }
		bool     Empty()           { return (length_ == 0); }

#ifdef OS_WINDOWS
		bool Open(const char *filename, const char *mode = "r") {
			Close();
			uint32 mode1, mode2, mode3, mode4;

			if (strcmp(mode, "r") == 0) {
				mode1 = GENERIC_READ;
				mode2 = PAGE_READONLY;
				mode3 = FILE_MAP_READ;
				mode4 = FILE_SHARE_READ;
			} else if (strcmp(mode, "r+") == 0) {
				mode1 = GENERIC_READ | GENERIC_WRITE;
				mode2 = PAGE_READWRITE;
				mode3 = FILE_MAP_ALL_ACCESS;
				mode4 = FILE_SHARE_READ | FILE_SHARE_WRITE;
			} else {
				LOG(ERROR) << "unknown open mode:" << filename;
				return false;
			}

			wstring filename_wide = UTF8ToUTF16(filename);
			/*if (Util::UTF8ToWide(filename, &filename_wide) <= 0) {
				return false;
			}*/

			handle_ = ::CreateFileW(filename_wide.c_str(), mode1, mode4,
				0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
			if (handle_ == INVALID_HANDLE_VALUE) {
				LOG(ERROR) << "CreateFile() failed: " << filename;
				return false;
			}

			length_ = ::GetFileSize(handle_, 0);

			map_handle_ = ::CreateFileMapping(handle_, 0, mode2, 0, 0, 0);
			if (map_handle_ == NULL) {
				LOG(ERROR) << "CreateFileMapping() failed: " << filename;
				return false;
			}

			text_ = reinterpret_cast<T *>(::MapViewOfFile(map_handle_, mode3, 0, 0, 0));
			if (text_ == NULL) {
				LOG(WARNING) << "MapViewOfFile() failed: " << filename;
				return false;
			}

			return true;
		}

		void Close() {
			if (text_ != NULL) {
				::UnmapViewOfFile(text_);
			}
			if (handle_ != INVALID_HANDLE_VALUE) {
				::CloseHandle(handle_);
				handle_ = INVALID_HANDLE_VALUE;
			}
			if (map_handle_ != NULL) {
				::CloseHandle(map_handle_);
				map_handle_ = NULL;
			}
			text_ = NULL;
		}

		Mmap() : text_(NULL), length_(0),
			handle_(INVALID_HANDLE_VALUE), map_handle_(NULL) {}

#else  // OS_WINDOWS

		bool Open(const char *filename, const char *mode = "r") {
			Close();
			struct stat st;

			if (strcmp(mode, "r") == 0) {
				flag_ = O_RDONLY;
			} else if (strcmp(mode, "r+") == 0) {
				flag_ = O_RDWR;
			} else {
				LOG(WARNING) << "unknown open mode: " << filename;
				return false;
			}

			if ((fd_ = ::open(filename, flag_ | O_BINARY)) < 0) {
				LOG(WARNING) << "open failed: " << filename;
				return false;
			}

			if (fstat(fd_, &st) < 0) {
				LOG(WARNING) << "failed to get file size: " << filename;
				return false;
			}

			length_ = st.st_size;

#ifdef HAVE_MMAP
			int prot = PROT_READ;
			if (flag_ == O_RDWR) {
				prot |= PROT_WRITE;
			}

			char *p = NULL;
			if ((p = reinterpret_cast<char *>
				(mmap(0, length_, prot, MAP_SHARED, fd_, 0))) == MAP_FAILED) {
					LOG(WARNING) << "mmap() failed: " << filename;
					return false;
			}

			// We don't check the return value because it fails in some environments.
			// For linux, in the kernel version >= 2.6.9, user process can mlock.
			// In older kernel, it fails if the process is running in user priviledge.
#ifndef OS_WINDOWS
			mlock(p, length_);
#endif
			text_ = reinterpret_cast<T *>(p);
#else
			text_ = new T[length_];
			if (read(fd_, text_, length_) <= 0) {
				LOG(WARNING) << "read() failed: " << filename;
				return false;
			}
#endif
			::close(fd_);
			fd_ = -1;

			return true;
		}

		void Close() {
			if (fd_ >= 0) {
				::close(fd_);
				fd_ = -1;
			}

			if (text_ != NULL) {
#ifdef HAVE_MMAP
#ifndef OS_WINDOWS
				// Again, we don't check the return value here.
				munlock(text_, length_);
#endif  // OS_WINDOWS
				munmap(reinterpret_cast<char *>(text_), length_);
				text_ = NULL;
#else  // HAVE_MMAP
				if (flag_ == O_RDWR) {
					int fd2;
					if ((fd2 = ::open(fileName.c_str(), O_RDWR)) >= 0) {
						write(fd2, text_, length_);
						::close(fd2);
					}
				}
				delete [] text_;
#endif  // HAVE_MMAP
			}

			text_ = NULL;
		}

		Mmap(): text_(NULL), length_(0), fd_(-1) {}
#endif   // OS_WINDOWS

		virtual ~Mmap() {
			Close();
		}

	private:
		wstring UTF8ToUTF16(const string& utf8) {
			wstring utf16;
			utf16.reserve(utf8.size());
			for ( size_t i = 0; i < utf8.size(); ++i ) {
				unsigned char ch0 = utf8[i];
				if ( (ch0 & 0x80) == 0x00 ) {
					utf16 += ((ch0 & 0x7f));
				} else {
					if ((ch0 & 0xe0) == 0xc0) {
						unsigned char ch1 = utf8[++i];
						utf16 += ((ch0 & 0x3f) << 6)|((ch1 & 0x3f));
					} else {
						unsigned char ch1 = utf8[++i];
						unsigned char ch2 = utf8[++i];
						utf16 += ((ch0 & 0x0f)<<12)|((ch1 & 0x3f)<<6)|((ch2 & 0x3f));
					}
				}
			}
			return utf16;
		}
		T   *text_;
		int length_;

#ifdef OS_WINDOWS
		HANDLE handle_;
		HANDLE map_handle_;
#else  // OS_WINDOWS
		int fd_;
		int flag_;
#endif  // OS_WINDOWS
	};

	template <class A, class U = uint32> class MMapFst;
	template <class A, class U> class MMapFst;
	template <class F, class G> void Cast(const F &, G *);

	// States and arcs each implemented by single arrays, templated on the
	// Arc definition. The unsigned type U is used to represent indices into
	// the arc array.

	template<class T>
	T Align(T a, T b) {
		b = b - 1;
		return (a + b) & (~b);
	}

	template <class A, class U>
	class MMapFstImpl : public FstImpl<A> {
	public:
		using FstImpl<A>::SetInputSymbols;
		using FstImpl<A>::SetOutputSymbols;
		using FstImpl<A>::SetType;
		using FstImpl<A>::SetProperties;
		using FstImpl<A>::Properties;
		using FstImpl<A>::WriteHeader;

		typedef A Arc;
		typedef typename A::Weight Weight;
		typedef typename A::StateId StateId;
		typedef U Unsigned;

		MMapFstImpl()
			: states_(0), arcs_(0), nstates_(0), narcs_(0), start_(kNoStateId),
			mmapped_(false) {
				string type = "mmap";
				if (sizeof(U) != sizeof(uint32)) {
					string size;
					Int64ToStr(8 * sizeof(U), &size);
					type += size;
				}
				SetType(type);
				SetProperties(kNullProperties | kStaticProperties);
		}

		explicit MMapFstImpl(const Fst<A> &fst);

		~MMapFstImpl() {
			if (!mmapped_) {
				delete[] states_;
				delete[] arcs_;
			} else {
				mmap_.Close();
			}
		}

		StateId Start() const { return start_; }

		Weight Final(StateId s) const { return states_[s].final; }

		StateId NumStates() const { return nstates_; }

		size_t NumArcs(StateId s) const { return states_[s].narcs; }

		size_t NumInputEpsilons(StateId s) const { return states_[s].niepsilons; }

		size_t NumOutputEpsilons(StateId s) const { return states_[s].noepsilons; }

		static MMapFstImpl<A, U> *Read(istream &strm, const FstReadOptions &opts);

		bool Write(ostream &strm, const FstWriteOptions &opts) const;

		A *Arcs(StateId s) { return arcs_ + states_[s].pos; }

		// Provide information needed for generic state iterator
		void InitStateIterator(StateIteratorData<A> *data) const {
			data->base = 0;
			data->nstates = nstates_;
		}

		// Provide information needed for the generic arc iterator
		void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
			data->base = 0;
			data->arcs = arcs_ + states_[s].pos;
			data->narcs = states_[s].narcs;
			data->ref_count = 0;
		}

	private:
		// States implemented by array *states_ below, arcs by (single) *arcs_.
		struct State {
			Weight final;                // Final weight
			Unsigned pos;                // Start of state's arcs in *arcs_
			Unsigned narcs;              // Number of arcs (per state)
			Unsigned niepsilons;         // # of input epsilons
			Unsigned noepsilons;         // # of output epsilons
			State() : final(Weight::Zero()), niepsilons(0), noepsilons(0) {}
		};

		// Properties always true of this Fst class
		static const uint64 kStaticProperties = kExpanded;
		// Current file format version
		static const int kFileVersion = 1;
		// Minimum file format version supported
		static const int kMinFileVersion = 1;
		// Byte alignment for states and arcs in file format
		static const int kFileAlign = 16;

		State *states_;                // States represenation
		A *arcs_;                      // Arcs representation
		StateId nstates_;              // Number of states
		size_t narcs_;                 // Number of arcs (per FST)
		StateId start_;                // Initial state

		Mmap<unsigned char> mmap_;
		bool mmapped_;
		DISALLOW_COPY_AND_ASSIGN(MMapFstImpl);
	};

	template <class A, class U>
	const uint64 MMapFstImpl<A, U>::kStaticProperties;
	template <class A, class U>
	const int MMapFstImpl<A, U>::kFileVersion;
	template <class A, class U>
	const int MMapFstImpl<A, U>::kMinFileVersion;
	template <class A, class U>
	const int MMapFstImpl<A, U>::kFileAlign;


	template<class A, class U>
	MMapFstImpl<A, U>::MMapFstImpl(const Fst<A> &fst) : nstates_(0), narcs_(0),
		mmapped_(false) {
		string type = "mmap";
		if (sizeof(U) != sizeof(uint32)) {
			string size;
			Int64ToStr(sizeof(U) * 8, &size);
			type += size;
		}
		SetType(type);
		uint64 copy_properties = fst.Properties(kCopyProperties, true);
		SetProperties(copy_properties | kStaticProperties);
		SetInputSymbols(fst.InputSymbols());
		SetOutputSymbols(fst.OutputSymbols());
		start_ = fst.Start();

		// Count # of states and arcs.
		for (StateIterator< Fst<A> > siter(fst);
			!siter.Done();
			siter.Next()) {
				++nstates_;
				StateId s = siter.Value();
				for (ArcIterator< Fst<A> > aiter(fst, s);
					!aiter.Done();
					aiter.Next())
					++narcs_;
		}
		states_ = new State[nstates_];
		arcs_ = new A[narcs_];
		size_t pos = 0;
		for (StateId s = 0; s < nstates_; ++s) {
			states_[s].final = fst.Final(s);
			states_[s].pos = pos;
			states_[s].narcs = 0;
			states_[s].niepsilons = 0;
			states_[s].noepsilons = 0;
			for (ArcIterator< Fst<A> > aiter(fst, s);
				!aiter.Done();
				aiter.Next()) {
					const A &arc = aiter.Value();
					++states_[s].narcs;
					if (arc.ilabel == 0)
						++states_[s].niepsilons;
					if (arc.olabel == 0)
						++states_[s].noepsilons;
					arcs_[pos++] = arc;
			}
		}
	}



	template<class A, class U>
	MMapFstImpl<A, U> *MMapFstImpl<A, U>::Read(istream &strm,
		const FstReadOptions &opts) {
			MMapFstImpl<A, U> *impl = new MMapFstImpl<A, U>;
			FstHeader hdr;
			if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr))
				return 0;

			impl->start_ = hdr.Start();
			impl->nstates_ = hdr.NumStates();
			impl->narcs_ = hdr.NumArcs();
			if (opts.source.empty() || opts.source == "stdin" ||
				opts.source == "standard input") {
				//Can't mmap stdin or file if we don't know the name
				//so read like a ConstFst				
				impl->states_ = new State[impl->nstates_];
				impl->arcs_ = new A[impl->narcs_];	
				char c;
				for (int i = 0; i < kFileAlign && strm.tellg() % kFileAlign; ++i)
					strm.read(&c, 1);				
				size_t b = impl->nstates_ * sizeof(typename MMapFstImpl<A, U>::State);
				strm.read(reinterpret_cast<char *>(impl->states_), b);
				if (!strm) {
					LOG(ERROR) << "MMapFst::Read: Read failed: " << opts.source;
					return 0;
				}							
				for (int i = 0; i < kFileAlign && strm.tellg() % kFileAlign; ++i)
					strm.read(&c, 1);
				b = impl->narcs_ * sizeof(A);
				strm.read(reinterpret_cast<char *>(impl->arcs_), b);
				if (!strm) {
					LOG(ERROR) << "MMapFst::Read: Read failed: " << opts.source;
					return 0;
				}
				return impl;
			} else {
				Mmap<unsigned char>& m = impl->mmap_;
				if (!m.Open(opts.source.c_str())) {
					return 0;
				}
				impl->mmapped_ = true;
				size_t a = kFileAlign;
				size_t b = strm.tellg();
				b = Align(b, a);
				typedef typename MMapFstImpl<A, U>::State S;
				impl->states_ = reinterpret_cast<S*>(m.begin() + b);
				b += impl->nstates_ * sizeof(S);
				b = Align(b, a);
				impl->arcs_ = reinterpret_cast<A*>(m.begin() + b);
				b += impl->narcs_ * sizeof(A);
				//Move the stream to the end of the arcs. There might be other
				//data written afterwards in the same stream that is read elsewhere
				strm.seekg(b, ios::beg);
				return impl;
			}
	}

	template<class A, class U>
	bool MMapFstImpl<A, U>::Write(ostream &strm,
		const FstWriteOptions &opts) const {
			FstHeader hdr;
			hdr.SetStart(start_);
			hdr.SetNumStates(nstates_);
			hdr.SetNumArcs(narcs_);
			WriteHeader(strm, opts, kFileVersion, &hdr);

			for (int i = 0; i < kFileAlign && strm.tellp() % kFileAlign; ++i)
				strm.write("", 1);
			strm.write(reinterpret_cast<char *>(states_), nstates_ * sizeof(State));
			for (int i = 0; i < kFileAlign && strm.tellp() % kFileAlign; ++i)
				strm.write("", 1);
			strm.write(reinterpret_cast<char *>(arcs_), narcs_ * sizeof(A));

			strm.flush();
			if (!strm) {
				LOG(ERROR) << "MMapFst::Write: Write failed: " << opts.source;
				return false;
			}
			return true;
	}


	// Simple concrete immutable FST.  This class attaches interface to
	// implementation and handles reference counting, delegating most
	// methods to ImplToExpandedFst. The unsigned type U is used to
	// represent indices into the arc array (uint32 by default, declared
	// in fst-decl.h).
	template <class A, class U>
	class MMapFst : public ImplToExpandedFst< MMapFstImpl<A, U> > {
	public:
		friend class StateIterator< MMapFst<A, U> >;
		friend class ArcIterator< MMapFst<A, U> >;
		template <class F, class G> void friend Cast(const F &, G *);

		typedef A Arc;
		typedef typename A::StateId StateId;
		typedef MMapFstImpl<A, U> Impl;
		typedef U Unsigned;

		MMapFst() : ImplToExpandedFst<Impl>(new Impl()) {}

		explicit MMapFst(const Fst<A> &fst)
			: ImplToExpandedFst<Impl>(new Impl(fst)) {}

		MMapFst(const MMapFst<A, U> &fst) : ImplToExpandedFst<Impl>(fst) {}

		// Get a copy of this MMapFst. See Fst<>::Copy() for further doc.
		virtual MMapFst<A, U> *Copy(bool safe = false) const {
			return new MMapFst<A, U>(*this);
		}

		// Read a MMapFst from an input stream; return NULL on error
		static MMapFst<A, U> *Read(istream &strm, const FstReadOptions &opts) {
			Impl* impl = Impl::Read(strm, opts);
			return impl ? new MMapFst<A, U>(impl) : 0;
		}

		// Read a MMapFst from a file; return NULL on error
		// Empty filename reads from standard input
		static MMapFst<A, U> *Read(const string &filename) {
			Impl* impl = ImplToExpandedFst<Impl>::Read(filename);
			return impl ? new MMapFst<A, U>(impl) : 0;
		}

		virtual void InitStateIterator(StateIteratorData<Arc> *data) const {
			GetImpl()->InitStateIterator(data);
		}

		virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const {
			GetImpl()->InitArcIterator(s, data);
		}

	private:
		explicit MMapFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {}

		// Makes visible to friends.
		Impl *GetImpl() const { return ImplToFst<Impl, ExpandedFst<A> >::GetImpl(); }

		void SetImpl(Impl *impl, bool own_impl = true) {
			ImplToFst< Impl, ExpandedFst<A> >::SetImpl(impl, own_impl);
		}

		void operator=(const MMapFst<A, U> &fst);  // disallow
	};


	// Specialization for MMapFst; see generic version in fst.h
	// for sample usage (but use the MMapFst type!). This version
	// should inline.
	template <class A, class U>
	class StateIterator< MMapFst<A, U> > {
	public:
		typedef typename A::StateId StateId;

		explicit StateIterator(const MMapFst<A, U> &fst)
			: nstates_(fst.GetImpl()->NumStates()), s_(0) {}

		bool Done() const { return s_ >= nstates_; }

		StateId Value() const { return s_; }

		void Next() { ++s_; }

		void Reset() { s_ = 0; }

	private:
		StateId nstates_;
		StateId s_;

		DISALLOW_COPY_AND_ASSIGN(StateIterator);
	};


	// Specialization for MMapFst; see generic version in fst.h
	// for sample usage (but use the MMapFst type!). This version
	// should inline.
	template <class A, class U>
	class ArcIterator< MMapFst<A, U> > {
	public:
		typedef typename A::StateId StateId;

		ArcIterator(const MMapFst<A, U> &fst, StateId s)
			: arcs_(fst.GetImpl()->Arcs(s)),
			narcs_(fst.GetImpl()->NumArcs(s)), i_(0) {}

		bool Done() const { return i_ >= narcs_; }

		const A& Value() const { return arcs_[i_]; }

		void Next() { ++i_; }

		size_t Position() const { return i_; }

		void Reset() { i_ = 0; }

		void Seek(size_t a) { i_ = a; }

		uint32 Flags() const {
			return kArcValueFlags;
		}

		void SetFlags(uint32 f, uint32 m) {}

	private:
		const A *arcs_;
		size_t narcs_;
		size_t i_;

		DISALLOW_COPY_AND_ASSIGN(ArcIterator);
	};

	// A useful alias when using StdArc.
	typedef MMapFst<StdArc> StdMMapFst;	

}  // namespace fst;

#endif  // FST_LIB_CONST_FST_H__
